Warning: this document contains 2 failing validations.

Check that all models were downloaded

Check that all the directories for the .nc files got made

source_dl <- dir(here("data_raw", "CMIP6")) source_id <- idx$source_id %>% unique() %>% str_to_lower() %>% str_replace_all("-", "_") stop_if_not(!any(!source_id %in% source_dl))

Check that all the corresponding .csv files exist

csvs <- list.files(here('data')) stop_if_not(!any(!paste0(source_id, "_data.csv") %in% csvs))

Calculations

Perform necessary calculations to compare PET and SPEI among models and between models and observed.

For PET, I’m using the “energy-only” method proposed by Milly and Dune (2016) eq. 8:

\[ PET = 0.8(R_n - G) \]

Except that in their notes, they estimate \(R_n -G\) as hfls + hfss after converting to units of mm/day using the latent heat of vaporazation of water, given by their eq. 2:

\[ L_v(T) = 2.501 - 0.002361T \] in MJ/kg

For the observed data and the CMIP6 data from the same period, I calculate 3-month SPEI using precipitation and PET. I then use SPEI to categorize months as experiencing mild, moderate, severe, extreme, or no drought. I compare the frequencies between observed and CMIP6 data with a Chi-squared goodness-of-fit test (p < 0.05 = dissimilar frequencies).

Comparison to Observed

Comparison of observed data to CMIP6 'historical' output
Data only from 1980 to 2015 to match observed. Results under 'Seasonality' refer to means for each month. Results under 'Historical Drought Freq' use SPEI3 to categorize drought into no drought (blue), mild (yellow), moderate (orange), severe (dark orange), and extreme (red)
Source Mean annual pr (∆obs) [mm/yr]3 Seasonality1 Historical Drought Freq.2
precip temp pr tas tasmin tasmax drought freq. Chi-squared
observed4 2341 (0) 1.00 1.00 1.00 1.00 1.000
access_cm2 1606 (-735) 0.03 −0.16 0.34 0.03 0.380
access_esm1_5 2179 (-162) 0.69 0.43 0.63 0.50 0.088
awi_cm_1_1_mr 1899 (-442) 0.80 0.49 0.65 0.53 0.068
bcc_csm2_mr 887 (-1454) 0.17 0.38 0.62 0.43 0.131
canesm5 1211 (-1130) 0.43 0.10 0.50 0.17 0.034
ciesm 3 (-2338) 0.95 0.73 0.62 0.48 0.324
cmcc_esm2 1823 (-518) 0.73 0.46 0.72 0.50 0.056
ec_earth3 1809 (-532) 0.31 0.06 0.53 0.15 0.209
ec_earth3_veg_lr 1789 (-552) 0.27 0.04 0.45 0.18 0.859
fgoals_g3 918 (-1423) 0.92 0.62 0.48 0.76 0.636
fio_esm_2_0 2244 (-97) 0.90 0.68 0.57 0.75 0.316
gfdl_esm4 1782 (-559) 0.78 0.49 0.85 0.52 0.362
inm_cm4_8 2564 (223) 0.87 0.15 0.43 0.50 0.283
inm_cm5_0 2721 (380) 0.92 0.03 0.27 0.50 0.174
ipsl_cm6a_lr 2149 (-192) 0.90 0.69 0.55 0.69 0.991
miroc6 1982 (-359) 0.80 0.51 0.71 0.41 0.840
mpi_esm1_2_lr 1551 (-790) 0.66 0.29 0.40 0.34 0.222
mri_esm2_0 2967 (626) 0.83 0.24 0.52 0.57 0.001
nesm3 1643 (-698) −0.06 −0.27 0.17 −0.27 0.168

1 Spearman's rho. Rho < 0.45 highlighted in red.

2 p-value from Chi-squared goodness-of-fit test comparing frequency of categories of drought to observed. A smaller p-value means more dissimilar frequencies.

3 Red indicates mean annual precipitation not within ± 20% of observed

4 Observed data from Xavier et al. (2016)

CMIP model details

Below are validation reports and plots of all data downloaded from each CMIP6 source.

access_cm2

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

access_esm1_5

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

awi_cm_1_1_mr

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

bcc_csm2_mr

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

canesm5

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

ciesm

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )

## Error: The `col_vals_expr()` validation failed beyond the absolute threshold level (1). ## * failure level (5076) >= failure threshold (1)

col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

## Error: Exceedance of failed test units where values in `tasmax` should have been between `10` and `55`. ## The `col_vals_between()` validation failed beyond the absolute threshold level (1). ## * failure level (225) >= failure threshold (1)

cmcc_esm2

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

ec_earth3

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

ec_earth3_veg_lr

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

fgoals_g3

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

fio_esm_2_0

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

gfdl_esm4

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

inm_cm4_8

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

inm_cm5_0

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

ipsl_cm6a_lr

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

miroc6

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

mpi_esm1_2_lr

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

mri_esm2_0

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )

nesm3

col_exists( df_list[[i]], c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"), label = "all variables exist" )
col_vals_make_set( df_list[[i]], experiment_id, c("historical", "ssp126", "ssp245", "ssp585"), label = "all scenarios exist" )
col_vals_not_null( df_list[[i]], c(hfls, hfss, tas, tasmin, tasmax, pr), label = "no missing values" )
col_vals_expr( df_list[[i]], ~max(.$pr) > as_units(200, "mm/month"), label = "precipitation is reasonable" )
col_vals_between( df_list[[i]], c(tas, tasmin, tasmax), 10, 55, label = "temperature is reasonable", preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric)) )